***********************************************************************
RETPENPQ AND RETPENCQ ARE WRONG (I.E. MORE THAN 95%=0 FOR 1985, BUT
THE DATA FROM THE INCOME FILES LOOK RIGHT.  SO, I USE THESE DATA.

SAME GOES FOR CASHCO, BUT HERE YOU NEED TO DIVIDE BY 4 BECAUSE DATA ON
ITAB ARE AT ANNUAL LEVEL

12/13/07: CHANGED FROM GZIP MACRO TO USING GZIP COMMEND.
***********************************************************************;

LIBNAME mydat '~/jim/data';

*******  FAMILY FILES   **********;
 
filename inc851 pipe 'gunzip -c ~/jim/data/ce8400/08904-0004-Data.txt.gz';
filename inc852 pipe 'gunzip -c ~/jim/data/ce8400/08904-0008-Data.txt.gz';
filename inc853 pipe 'gunzip -c ~/jim/data/ce8400/08904-0012-Data.txt.gz';
filename inc854 pipe 'gunzip -c ~/jim/data/ce8400/08904-0016-Data.txt.gz';
filename inc855 pipe 'gunzip -c ~/jim/data/ce8400/08904-0020-Data.txt.gz';
 
%MACRO read(qyr);
 
  DATA itab&qyr;
   INFILE inc&qyr ;
   INPUT newid 1-8 ucc 13-18 cost 20-31;;
IF ucc IN(850300,900001,800801,2120) 
   OR 990900 LE ucc LE 990950           
   OR 800700 LE ucc LE 800710 
   OR 800810 LE ucc LE 800940;

   qyear=&qyr;
   newid=9000000+newid;
  RUN;
 
PROC SORT; 
   BY newid;
RUN; 
PROC MEANS DATA=itab&qyr SUM NOPRINT; 
     BY newid;
     WHERE 800910 LE ucc LE 800940;
     VAR cost;
     OUTPUT OUT=out_r&qyr(DROP=_TYPE_ _FREQ_) SUM=tot_retpen;
RUN;
PROC MEANS; 
RUN;
PROC MEANS DATA=itab&qyr SUM NOPRINT; 
     BY newid;
     WHERE ucc=800801 OR 800810 LE ucc LE 800860;
     VAR cost;
     OUTPUT OUT=out_cco&qyr(DROP=_TYPE_ _FREQ_) SUM=tot_cashco;
RUN;
PROC MEANS; 
RUN;

%MEND read;
 
%read(851);
%read(852);
%read(853);
%read(854);
%read(855);

DATA fix_retire;
   SET out_r851 out_r852 out_r853 out_r854 out_r855;
RUN;
PROC SORT; 
   BY newid;
RUN; 
DATA fix_cashco;
   SET out_cco851 out_cco852 out_cco853 out_cco854 out_cco855;
RUN;
PROC SORT; 
   BY newid;
RUN; 

DATA mydat.fix_retire;
   MERGE fix_retire fix_cashco;
   BY newid;
RUN;

X "gzip -f ~/jim/data/fix_retire.sas7bdat";

